
BOT_NAME = 'ama'

SPIDER_MODULES = ['ama.spiders']
NEWSPIDER_MODULE = 'ama.spiders'


#用户代理可用列表
USER_AGENT_LIST = [
  "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4421.5 Safari/537.36",
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Edg/90.0.818.62",
]

# 日志等级
LOG_LEVEL = "ERROR"
# 用户代理，不要使用默认scrapy代理很明显会被识别为爬虫
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4421.5 Safari/537.36"

# 是否遵循爬虫协议
ROBOTSTXT_OBEY = False

# 允许爬虫线程最大并发数：
CONCURRENT_REQUESTS = 600

# 下载延迟，建议一秒左右
DOWNLOAD_DELAY = 0.5
# The download delay setting will honor only one of:

#每个域名的最大并发数
CONCURRENT_REQUESTS_PER_DOMAIN = 600

# 每个ip的最大并发数
CONCURRENT_REQUESTS_PER_IP = 600

#  一定要启用cookie，否则可能无法和网页交互，如登录后的爬取
COOKIES_ENABLED = True

# 是否开启插件
TELNETCONSOLE_ENABLED = True

# 默认请求头信息
DEFAULT_REQUEST_HEADERS = {
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  'Accept-Language': 'en',
  'USER_AGENT' : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4421.5 Safari/537.36"
}

# 爬虫中间件
SPIDER_MIDDLEWARES = {
   'ama.middlewares.AmaSpiderMiddleware': 543,
   
   
}

# 下载中间件
DOWNLOADER_MIDDLEWARES = {
   'ama.middlewares.AmaDownloaderMiddleware': 543,
   'ama.middlewares.RandomUserAgentmiddleware': 544,
   # 'ama.middlewares.Hhandle': 545,
   'ama.middlewares.JdongMiddleware': 546,
   
}

# 扩展中间件
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# 管道中间件
ITEM_PIPELINES = {
   'ama.pipelines.AmaPipeline': 300,
   "ama.pipelines.AmaPipeline1" : 400
}

# 自动限速
#AUTOTHROTTLE_ENABLED = True

# 初始下载延迟
#AUTOTHROTTLE_START_DELAY = 5

# 最大下载延迟
#AUTOTHROTTLE_MAX_DELAY = 60

#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# 是否启用http缓存
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# 禁止请求重试
RETRY_ENABLED = False